transform

old TransE-like models
git clone https://esimon.eu/repos/transform.git
Log | Files | Refs | README

build dummy dataset.py (1471B)


      1 #!/usr/bin/env python2
      2 
      3 from __future__ import print_function
      4 import sys
      5 import os
      6 import shutil
      7 import random
      8 
      9 def construct_dummy_dataset(kind, prefix, n_entities, n_relations):
     10     os.mkdir(prefix)
     11 
     12     with open(prefix+'/entities', 'w') as file:
     13         for i in xrange(n_entities):
     14             file.write('E{0}\n'.format(i))
     15 
     16     with open(prefix+'/relations', 'w') as file:
     17         for i in xrange(n_relations):
     18             file.write('R{0}\n'.format(i))
     19 
     20     with open(prefix+'/train', 'w') as file:
     21         for r in xrange(n_relations):
     22             right = range(n_entities/2)
     23             random.shuffle(right)
     24             if kind=='id':
     25                 for e in xrange(n_entities):
     26                     file.write('{0}\t{1}\t{2}\n'.format(e, r, e))
     27             elif kind=='halfperm':
     28                 for e in xrange(n_entities/2):
     29                     file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_entities/2))
     30             else:
     31                 raise error('Unknown kind')
     32 
     33     shutil.copyfile(prefix+'/train', prefix+'/valid')
     34     shutil.copyfile(prefix+'/train', prefix+'/test')
     35 
     36 if __name__ == '__main__':
     37     if len(sys.argv)<5:
     38         print('Usage: {0} {{id, halfperm}} dataset_name n_entities n_relations'.format(sys.argv[0]), file=sys.stderr)
     39         sys.exit(1)
     40     kind = sys.argv[1]
     41     prefix = sys.argv[2]
     42 
     43     n_entities = int(sys.argv[3])
     44     n_relations = int(sys.argv[4])
     45 
     46     construct_dummy_dataset(kind, prefix, n_entities, n_relations)